This document is structured as follows:

Research question to answer

Train and test R2 performances O2PLS/PLS/PO2PLS

load('~/MEGANZ/LUMC dingen/LUMC/PhD/Paper 4 PO2PLS/PO2PLS_Software/R2outp_all.RData')
R2outp %<>% mutate(type=str_split(key, '_')%>%sapply(`[[`,1), method = str_split(key, '_')%>%sapply(`[[`,2)) %>% select(-key)
p <- ggplot(data=R2outp%>%filter(method!="ppls"), aes(x=method, y=sqrt(value))) +
  geom_boxplot(aes(col = type %>% factor(c('train','test')))) +
  geom_hline(yintercept=1) +
  facet_grid(N ~ p, scales = 'free') +
  theme_bw() + scale_x_discrete("Method") + scale_y_continuous("RMSEP") +
  theme(axis.title = element_text(face="bold", size=16)) +
  scale_color_discrete("Type")
ggplotly(p)
load('~/MEGANZ/LUMC dingen/LUMC/PhD/Paper 4 PO2PLS/PO2PLS_Software/R2outp_all2_nois.RData')
p <- ggplot(data=R2outp, aes(x=method, y=sqrt(value))) +
  geom_boxplot(aes(col = type %>% factor(c('train','test')))) +
  geom_hline(yintercept=1, lty=2,col='grey') +
  facet_grid(N+noise ~ p, scales = 'free') +
  theme_bw() + scale_x_discrete("Method") + scale_y_continuous("RMSEP") +
  theme(axis.title = element_text(face="bold", size=16)) +
  scale_color_discrete("Type")
ggplotly(p)
load('~/MEGANZ/LUMC dingen/LUMC/PhD/Paper 4 PO2PLS/PO2PLS_Software/R2outp_all35comp_nois.RData')
p <- ggplot(data=R2outp, aes(x=method, y=sqrt(value))) +
  geom_boxplot(aes(col = type %>% factor(c('train','test')))) +
  geom_hline(yintercept=1, lty=2,col='grey') +
  facet_grid(N+noise ~ p, scales = 'free') +
  theme_bw() + scale_x_discrete("Method") + scale_y_continuous("RMSEP") +
  theme(axis.title = element_text(face="bold", size=16)) +
  scale_color_discrete("Type")
ggplotly(p)

Correct top 25% in very high D

load('outp3_51.RData')
#outp3_555 <- parallelsugar::mclapply(mc.cores=parallel::detectCores(), X = 1:100, FUN = ff, p=1e3, N = 50, noise_alpha=0.9)
#invisible(gc())
outp3_555 %>% simplify2array %>% 
  apply(1,quantile, c(0.025, 0.5, 0.975))
##        O2PLS PO2PLS    PLS   PPLS
## 2.5%  0.2504 0.2552 0.2496 0.2304
## 50%   0.3024 0.3128 0.2992 0.2992
## 97.5% 0.3672 0.3896 0.3504 0.3616
#outp3_111 <- parallelsugar::mclapply(mc.cores=parallel::detectCores(), X = 1:100, FUN = ff, p=1e3, N = 50, noise_alpha=0.9, r=1, rx=1, ry=1)
#invisible(gc())
outp3_111 %>% simplify2array %>% 
  apply(1,quantile, c(0.025, 0.5, 0.975))
##       O2PLS PO2PLS   PLS  PPLS
## 2.5%  0.280  0.276 0.288 0.236
## 50%   0.408  0.528 0.396 0.328
## 97.5% 0.540  0.676 0.548 0.664
# save(outp3_555, outp3_111, file = 'outp3_51.RData')

load('~/MEGANZ/LUMC dingen/LUMC/PhD/Paper 4 PO2PLS/PO2PLS_Software/outpp_data550.RData')
sapply(outpp, `[[`, 5) %>% apply(1,quantile, c(0.025,0.5,0.975))
##         O2PLS PO2PLS    PLS    PPLS
## 2.5%  0.24872 0.2504 0.2507 0.25196
## 50%   0.28160 0.2940 0.2800 0.28080
## 97.5% 0.31296 0.3405 0.3120 0.33284

Train and test errors of covariance blocks Sx, Sxy, Sy

load('~/MEGANZ/LUMC dingen/LUMC/PhD/Paper 4 PO2PLS/PO2PLS_Software/R2outp_all2_nois.RData')
covnames <- str_split(names(covoutp), "_") %>% sapply(function(e) as.numeric(e[1:3])) %>% t %>% as_tibble
names(covnames) <- c("N", "p", "noise")
covoutp0 <- covoutp %>% lapply(function(e) lapply(e, function(ee) (ee%*%diag(1/ee[5,],3))[1:4,])) %>%
  unlist %>% unname %>% array(dim = c(4,3,2,length(names(covoutp))), dimnames=c(dimnames(covoutp$`2500_20_0.1`$train[-5,]),list(names(covoutp[[1]])),list(names(covoutp))))
covoutp0 %<>% (reshape2::melt) 
covoutp0 <- covoutp %>% lapply(function(e) lapply(e, function(ee) (ee)[1:5,])) %>%
 unlist %>% unname %>% array(dim = c(5,3,2,length(names(covoutp))),
   dimnames=c(dimnames(covoutp$`2500_20_0.1`$train),list(names(covoutp[[1]])),list(names(covoutp)))) %>%
  (reshape2::melt)
covoutp0$N <- covoutp0$Var4 %>% str_split('_') %>% sapply(function(e) e[[1]]) %>% as.numeric
covoutp0$p <- covoutp0$Var4 %>% str_split('_') %>% sapply(function(e) e[[2]])
covoutp0$noise <- covoutp0$Var4 %>% str_split('_') %>% sapply(function(e) e[[3]])
p <- covoutp0 %>% ggplot(aes(x=Var1, y=log(value))) + geom_point(aes(col=noise, shape=p)) + facet_grid(N ~ Var2+Var3, scales='free')
plotly::ggplotly(p)
load('~/MEGANZ/LUMC dingen/LUMC/PhD/Paper 4 PO2PLS/PO2PLS_Software/R2outp_all45comp_nois.RData')
covnames <- str_split(names(covoutp), "_") %>% sapply(function(e) as.numeric(e[1:3])) %>% t %>% as_tibble
names(covnames) <- c("N", "p", "noise")
covoutp0 <- covoutp %>% lapply(function(e) lapply(e, function(ee) (ee%*%diag(1/ee[5,],3))[1:4,])) %>%
  unlist %>% unname %>% array(dim = c(4,3,2,length(names(covoutp))), dimnames=c(dimnames(covoutp$`2500_20_0.1`$train[-5,]),list(names(covoutp[[1]])),list(names(covoutp))))
covoutp0 %<>% (reshape2::melt) 
covoutp0 <- covoutp %>% lapply(function(e) lapply(e, function(ee) (ee)[1:5,])) %>%
 unlist %>% unname %>% array(dim = c(5,3,2,length(names(covoutp))),
   dimnames=c(dimnames(covoutp$`2500_20_0.1`$train),list(names(covoutp[[1]])),list(names(covoutp)))) %>%
  (reshape2::melt)
covoutp0$N <- covoutp0$Var4 %>% str_split('_') %>% sapply(function(e) e[[1]]) %>% as.numeric
covoutp0$p <- covoutp0$Var4 %>% str_split('_') %>% sapply(function(e) e[[2]])
covoutp0$noise <- covoutp0$Var4 %>% str_split('_') %>% sapply(function(e) e[[3]])
p <- covoutp0 %>% ggplot(aes(x=Var1, y=log(value))) + geom_point(aes(col=noise, shape=p)) + facet_grid(N ~ Var2+Var3, scales='free')
plotly::ggplotly(p)
covUML <- list(Upp = covUpp, Med = covMed, Low = covLow)
covoutp0 <- covUML %>% #lapply(function(e0) lapply(function(e) lapply(e, function(ee) (ee)[1:5,]))) %>%
 unlist %>% unname %>% array(dim = c(5,3,2,length(names(covoutp)),3),
   dimnames=c(dimnames(covoutp$`2500_20_0.1`$train),
              list(names(covoutp[[1]])),list(names(covoutp)),
              list(names(covUML)))) %>%
  (reshape2::melt)
covoutp0$N <- covoutp0$Var4 %>% str_split('_') %>% sapply(function(e) e[[1]]) %>% as.numeric
covoutp0$p <- covoutp0$Var4 %>% str_split('_') %>% sapply(function(e) e[[2]])
covoutp0$noise <- covoutp0$Var4 %>% str_split('_') %>% sapply(function(e) e[[3]])
p <- covoutp0 %>% filter(noise == '0.9', p == '2000') %>% 
  ggplot(aes(x=Var1, y=log(value))) + 
  geom_point(aes(shape=Var5)) + 
  facet_grid(N ~ Var2+Var3, scales='free')
p <- covoutp0 %>% 
    ggplot(aes(x=Var1, y=log(value))) + 
    geom_point(aes(shape = p, col = noise)) + 
    facet_grid(N ~ Var2+Var3, scales='free')
plotly::ggplotly(p)